library(Seurat)
library(plyr)
library('biomaRt')
data <- readRDS('../output/10x-180831')
Data loading
bulk.df <- read.table('../data/181128-All data cells_for pascal.txt', sep='\t', header=T, stringsAsFactors = F, row.names=1, check.names=F)
cannot open file '../data/181128-All data cells_for pascal.txt': No such file or directoryError in file(file, "rt") : cannot open the connection
Function from PT to map gene symbols to ensembl ID’s.
hs_add_gene_symbol_from_ensembl_ids <- function(df, colname_geneids_from="gene", colname_geneids_to="ensembl_gene_id") {
### INPUT df: a tibble/data.frame with the column 'colname_geneids_from' with human ensembl gene ids.
### OUTOUT df
# returns a tibble with human gene symbols added to the column 'colname_geneids_to'.
# Genes that did not map have NA values in the 'colname_geneids_to' column.
# If there are duplicated gene IDs in 'colname_geneids_to', then all but the first of the duplicated elements will be marked as NA.
df <- as.data.frame(df) # convert to df to ensure the the below operations work.
file.mapping <- '/projects/pytrik/sc_adipose/analyze_10x_fluidigm/gene_mapping/gene_mapping.GRCh37.87/Homo_sapiens.GRCh37.87.gene_name_version2ensembl.txt'
#file.mapping <- "/raid5/projects/timshel/sc-genetics/sc-genetics/data/gene_annotations/GRCh38.ens_v90.gene_name_version2ensembl.txt.gz"
df.mapping <- suppressMessages(read_tsv(file.mapping))
genes_mapped <- df.mapping$gene_name_optimal[match(df[,colname_geneids_from], df.mapping$ensembl_gene_id)]
bool_dups <- duplicated(genes_mapped, incomparables=NA) # marks elements with smaller subscripts as duplicates
# ^ incomparables=NA: 'excluding' NA when counting duplicated. NA values will not be compared. (That is, duplicated() returns FALSE for NA values)
# ^ duplicated(c(1,1,2,NA,NA,NA)) returns FALSE TRUE FALSE FALSE TRUE TRUE.
# ^ duplicated(c(1,1,2,NA,NA,NA), incomparables=NA) returns FALSE TRUE FALSE FALSE FALSE FALSE.
print(sprintf("Number of genes mapped: %s",sum(!is.na(genes_mapped))))
print(sprintf("Number of genes not mapped: %s",sum(is.na(genes_mapped)))) # number of not mapped genes
print(sprintf("Number of genes with a NON-unique mapping (genes with duplicated ensembl gene IDs after mapping): %s",sum(bool_dups)))
### set duplicated rows (with smaller subscripts) as NA
genes_mapped[bool_dups] <- NA
print(sprintf("Total mapping stats: %s genes have no mapping (not mapped + duplicates) out of %s input genes.", sum(is.na(genes_mapped)), length(genes_mapped)))
print(sprintf("Total genes mapped (non NA genes): %s", sum(!is.na(genes_mapped))))
df <- df %>% mutate(!!rlang::sym(colname_geneids_to):=genes_mapped) %>% as.tibble()
# filter(!is.na(gene)) %>% # remove all rows without mapping
# filter(!duplicated(gene)) # keep only one of the duplicated pair (if any)
print(sprintf("Returning tibble with the column '%s' added where all gene identifiers unique. Unmapped genes have NA values", colname_geneids_to))
return(df)
}
Filter the bulk data and sc data to only contain genes that are present in both. First, convert rownames bulk data to gene symbols.
ids2symbols <- hs_add_gene_symbol_from_ensembl_ids(data.frame(gene=rownames(bulk.df)), colname_geneids_to='gene_symbol')
[1] "Number of genes mapped: 53203"
[1] "Number of genes not mapped: 6904"
[1] "Number of genes with a NON-unique mapping (genes with duplicated ensembl gene IDs after mapping): 0"
[1] "Total mapping stats: 6904 genes have no mapping (not mapped + duplicates) out of 60107 input genes."
[1] "Total genes mapped (non NA genes): 53203"
[1] "Returning tibble with the column 'gene_symbol' added where all gene identifiers unique. Unmapped genes have NA values"
bulk.df.filtered <- bulk.df[!is.na(ids2symbols$gene_symbol),]
rownames(bulk.df.filtered) <- ids2symbols$gene_symbol[!is.na(ids2symbols$gene_symbol)]
Then filter 10x data and bulk data on intersecting genes.
sc.df.filtered <- sc.df[which(rownames(sc.fd) %in% intersecting_genes),]
Error: object 'sc.df' not found
bulk <- CreateSeuratObject(bulk.df.filtered2, project='Bulk')
sc10x <- CreateSeuratObject(sc.df.filtered.cols, project='10x')
Add metadata
x <- strsplit(rownames(bulk@meta.data), '\\.')
metadata <- data.frame(do.call(rbind, x))
bulk@meta.data$type <- tolower(metadata$X1)
bulk@meta.data$stimulated <- metadata$X3
bulk@meta.data$average <- metadata$X2
bulk <- SubsetData(bulk, cells.use=rownames(bulk)[bulk@meta.data$average != 'AVG'])
bulk@meta.data$average <- NULL
bulk@meta.data
rr #saveRDS(bulk, ‘../output/bulk-seurat’)
bulk <- RunPCA(bulk)
You're computing too large a percentage of total singular values, use a standard svd instead.
[1] "PC1"
[1] "ECHDC2" "MVK" "ACY1" "NAA40" "SUOX" "ANO8" "TXNRD2"
[8] "OPLAH" "TM7SF2" "DGCR6" "SREBF1" "PPP1R16A" "ELMOD3" "THRSP"
[15] "PLEKHH3" "HIST2H2BE" "ACADS" "ACSS2" "D2HGDH" "ECHS1" "HOMEZ"
[22] "PNPLA3" "ZBTB7B" "HRASLS5" "ACHE" "MMP15" "NECAB3" "RAB40C"
[29] "LPIN1" "YBX2"
[1] ""
[1] "SEC24D" "CUL4B" "CBLB" "EPS8" "EIF1B" "URB1" "BAG2"
[8] "GARS" "UGDH" "CREB3L2" "C12orf23" "PPRC1" "SLC35C1" "DKK1"
[15] "FHL2" "HSPH1" "RFK" "SASH1" "NCOA7" "MEDAG" "RAI14"
[22] "ITPRIP" "FAM57A" "NIP7" "SLC25A32" "EDNRB" "TES" "TXNRD1"
[29] "FGF2" "DLC1"
[1] ""
[1] ""
[1] "PC2"
[1] "COL3A1" "HEXDC" "COL8A1" "NRN1" "HOXC8" "COL11A1" "HOXC10"
[8] "COL5A1" "NRCAM" "STON1" "KIAA1324L" "FARP1" "DPT" "SLIT2"
[15] "COL1A1" "THY1" "CRNDE" "HDAC9" "LRP5" "EMILIN1" "CDC42EP3"
[22] "MYO1D" "FBN2" "MYLK" "HOXC6" "HOXB6" "ITPR1" "NRP2"
[29] "SMAP2" "MICAL1"
[1] ""
[1] "CDK15" "MAP3K1" "SLC4A4" "GPD2" "HSPB8" "ACSL5" "AGT" "KCNK3"
[9] "MEST" "APEX2" "FABP3" "CES2" "ADPRH" "SIX1" "PLA2G4A" "FHL1"
[17] "CYCS" "SLC24A3" "CA12" "PFKM" "DNAJA4" "NDUFAF4" "KLHL29" "OSR1"
[25] "MFI2" "ALPL" "APLN" "PPA1" "TUBA4A" "SLC12A7"
[1] ""
[1] ""
[1] "PC3"
[1] "CBX6" "CACHD1" "DPYSL2" "TENC1" "IL16" "PRRX1" "CBR3"
[8] "TGFB2" "SNAI2" "CORO2B" "FAXDC2" "SLC12A6" "PSIP1" "PXN"
[15] "UACA" "EEF2K" "AHRR" "CALHM2" "C1RL" "IFFO1" "BBX"
[22] "UBE2L6" "ISLR" "CORO6" "IFIT1" "CARHSP1" "PPM1K" "SMG6"
[29] "CBX7" "RAMP2-AS1"
[1] ""
[1] "ACSL4" "C19orf12" "FZD4" "DIXDC1" "PITPNC1" "PMEPA1" "PRKAG2"
[8] "CPEB4" "SMOX" "KIAA0922" "TXLNG" "TUBB2A" "SLC22A3" "FABP5"
[15] "FABP4" "RASSF3" "TACC2" "RCE1" "HCAR2" "SLC25A13" "SLC19A3"
[22] "PHLDB2" "EHD4" "FAM89A" "GAB2" "DUSP4" "AMIGO2" "SYAP1"
[29] "ZRANB1" "CDC42EP4"
[1] ""
[1] ""
[1] "PC4"
[1] "TNFRSF21" "PRSS23" "SLC6A6" "PDPR" "FIGF" "RPA2" "C7"
[8] "TMEM37" "ABCG1" "SRGAP1" "SERPINB8" "CD97" "HMGA1" "ALDH1A3"
[15] "CLSTN2" "PANX1" "LINC00152" "ITGA3" "ELK3" "ABCA1" "CDH13"
[22] "USP53" "SH2B3" "THBD" "CBFB" "MICAL2" "IRF1" "EFHD1"
[29] "GXYLT2" "WDR35"
[1] ""
[1] "KIAA1161" "GABPB2" "NCALD" "PLAGL1" "TMUB1" "SYNM" "HOOK2"
[8] "NHSL1" "MAST4" "EFS" "APBB3" "AMPH" "SLC16A14" "CAB39L"
[15] "NXN" "CILP" "LIN7A" "ARMCX2" "ASAP3" "USP54" "CDKL5"
[22] "NAP1L5" "PPDPF" "SLC6A15" "ADAM33" "PITPNM1" "CLIP2" "SH3PXD2A"
[29] "SYNE2" "GADD45G"
[1] ""
[1] ""
[1] "PC5"
[1] "PPM1L" "GSTM1" "TENM4" "PHKA1" "MRO"
[6] "PCK2" "TRIM16L" "PSAT1" "AFAP1L1" "PRG4"
[11] "RP1-193H18.3" "PM20D1" "ABCC3" "SLC2A1" "CALCRL"
[16] "CPM" "GSTM2" "STARD7" "MET" "LIN7A"
[21] "ATF5" "CYP4V2" "SESN3" "DIRAS1" "TNFRSF10D"
[26] "PHKG1" "ITGA3" "LRP8" "PHYHD1" "TBX15"
[1] ""
[1] "AEBP1" "MAF" "SMOC2" "FLRT2" "LRRC32" "CTSK" "SLC40A1"
[8] "RUNX1" "FBLN5" "THBS2" "LPCAT1" "ACKR4" "FAM180A" "CLEC2B"
[15] "TMEM200A" "IFITM1" "NFKB2" "GPR176" "TMEM150C" "SEMA5A" "TMEM59L"
[22] "FBLN1" "MFAP2" "FAP" "ATHL1" "NFATC4" "NOVA1" "BASP1"
[29] "GPRC5B" "KCNK2"
[1] ""
[1] ""
type_stimulated_combined <- as.vector(apply(bulk@meta.data[,c('type', 'stimulated')], 1, function(x){
print(x[['type']])
print(x[['stimulated']])
}))
[1] "brown"
[1] "Non"
[1] "brown"
[1] "NE-stim"
[1] "brown"
[1] "Non"
[1] "brown"
[1] "NE-stim"
[1] "brown"
[1] "Non"
[1] "brown"
[1] "NE-stim"
[1] "brown"
[1] "Non"
[1] "brown"
[1] "NE-stim"
[1] "brown"
[1] "Non"
[1] "brown"
[1] "NE-stim"
[1] "white"
[1] "Non"
[1] "white"
[1] "NE-stim"
[1] "white"
[1] "Non"
[1] "white"
[1] "NE-stim"
[1] "white"
[1] "Non"
[1] "white"
[1] "NE-stim"
[1] "white"
[1] "Non"
[1] "white"
[1] "NE-stim"
[1] "white"
[1] "Non"
[1] "white"
[1] "NE-stim"
[1] "brown"
[1] "Non"
[1] "brown"
[1] "NE-stim"
[1] "white"
[1] "Non"
[1] "white"
[1] "NE-stim"
#saveRDS(bulk, '../output/bulk-seurat')
Add metadata to sc10x Seurat object.
all_metadata <- all_metadata[match(rownames(sc10x), rownames(all_metadata)),]
Error: object 'all_metadata' not found
#saveRDS(sc10x, '../output/10x-180831-filtered-genes-bulk')
Performed preprocessing (normalization, scaling, PCA, t-SNE) on the merged Seurat object (bulk + sc)
Aligned.
load('/data/rna-seq/gtex/v7-seurat_objs/gtex.seurat_obj.gene_tpm.RData')
table(seurat_obj@meta.data$SMTS)
Adipose Tissue Adrenal Gland Bladder Blood
797 190 11 537
Blood Vessel Brain Breast Cervix Uteri
913 1671 290 11
Colon Esophagus Fallopian Tube Heart
507 1021 7 600
Kidney Liver Lung Muscle
45 175 427 564
Nerve Ovary Pancreas Pituitary
414 133 248 183
Prostate Salivary Gland Skin Small Intestine
152 97 1203 137
Spleen Stomach Testis Thyroid
162 262 259 446
Uterus Vagina
111 115
table(seurat_obj@meta.data$SMTSD)
Adipose - Subcutaneous
442
Adipose - Visceral (Omentum)
355
Adrenal Gland
190
Artery - Aorta
299
Artery - Coronary
173
Artery - Tibial
441
Bladder
11
Brain - Amygdala
100
Brain - Anterior cingulate cortex (BA24)
121
Brain - Caudate (basal ganglia)
160
Brain - Cerebellar Hemisphere
136
Brain - Cerebellum
173
Brain - Cortex
158
Brain - Frontal Cortex (BA9)
129
Brain - Hippocampus
123
Brain - Hypothalamus
121
Brain - Nucleus accumbens (basal ganglia)
147
Brain - Putamen (basal ganglia)
124
Brain - Spinal cord (cervical c-1)
91
Brain - Substantia nigra
88
Breast - Mammary Tissue
290
Cells - EBV-transformed lymphocytes
130
Cells - Transformed fibroblasts
343
Cervix - Ectocervix
6
Cervix - Endocervix
5
Colon - Sigmoid
233
Colon - Transverse
274
Esophagus - Gastroesophageal Junction
244
Esophagus - Mucosa
407
Esophagus - Muscularis
370
Fallopian Tube
7
Heart - Atrial Appendage
297
Heart - Left Ventricle
303
Kidney - Cortex
45
Liver
175
Lung
427
Minor Salivary Gland
97
Muscle - Skeletal
564
Nerve - Tibial
414
Ovary
133
Pancreas
248
Pituitary
183
Prostate
152
Skin - Not Sun Exposed (Suprapubic)
387
Skin - Sun Exposed (Lower leg)
473
Small Intestine - Terminal Ileum
137
Spleen
162
Stomach
262
Testis
259
Thyroid
446
Uterus
111
Vagina
115
Whole Blood
407
### CONVERT ID's TO SYMBOLS
ids2symbols <- hs_add_gene_symbol_from_ensembl_ids(data.frame(gene=rownames(gtex_adipose_df)), colname_geneids_to='gene_symbol')
[1] "Number of genes mapped: 54038"
[1] "Number of genes not mapped: 2164"
[1] "Number of genes with a NON-unique mapping (genes with duplicated ensembl gene IDs after mapping): 0"
[1] "Total mapping stats: 2164 genes have no mapping (not mapped + duplicates) out of 56202 input genes."
[1] "Total genes mapped (non NA genes): 54038"
`as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
[90mThis warning is displayed once per session.[39m
[1] "Returning tibble with the column 'gene_symbol' added where all gene identifiers unique. Unmapped genes have NA values"
### CONVERT ID's TO SYMBOLS
ids2symbols <- hs_add_gene_symbol_from_ensembl_ids(data.frame(gene=rownames(gtex_adipose_df)), colname_geneids_to='gene_symbol')
[1] "Number of genes mapped: 54038"
[1] "Number of genes not mapped: 2164"
[1] "Number of genes with a NON-unique mapping (genes with duplicated ensembl gene IDs after mapping): 0"
[1] "Total mapping stats: 2164 genes have no mapping (not mapped + duplicates) out of 56202 input genes."
[1] "Total genes mapped (non NA genes): 54038"
[1] "Returning tibble with the column 'gene_symbol' added where all gene identifiers unique. Unmapped genes have NA values"
gtex_adipose.filtered <- gtex_adipose_df[!is.na(ids2symbols$gene_symbol),]
gtex <- AddMetaData(gtex, gtex_adipose@meta.data)
table(gtex@meta.data$SMTSD)
Adipose - Subcutaneous Adipose - Visceral (Omentum)
442 355
merged_bulk_gtex <- MergeSeurat(bulk, gtex, )
merged_bulk_gtex <- ScaleData(merged_bulk_gtex)
Scaling data matrix
|
| | 0%
|
|================================================================================| 100%
merged_bulk_gtex <- FindVariableGenes(merged_bulk_gtex)
Calculating gene means
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variance to mean ratios
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
merged_bulk_gtex <- RunPCA(merged_bulk_gtex)
[1] "PC1"
[1] "BIRC6" "DMXL1" "MIA3" "HUWE1" "TAOK1" "IREB2" "NCKAP1"
[8] "SMG1" "DYNC1H1" "SBNO1" "OPA1" "SETD7" "HOOK3" "HECTD1"
[15] "KMT2C" "NOTCH2" "DOCK7" "MAPK1" "PRKDC" "TNPO1" "TRIP12"
[22] "SNX13" "SCAF11" "MED13" "KIAA1715" "NF1" "KIF1B" "RAB3GAP2"
[29] "CAND1" "GAPVD1"
[1] ""
[1] "ZFP36" "JUNB" "XBP1" "PCDH1" "ID1" "TMEM88" "IGFBP2"
[8] "SOX17" "GADD45B" "NOTCH4" "NOSTRIN" "NDUFA4L2" "EPHA2" "PTP4A3"
[15] "VAMP8" "C11orf96" "CSRNP1" "PSMB9" "ICAM3" "FLT4" "PPAN"
[22] "RASGRP2" "ARID5A" "TMEM255B" "FOS" "ARAP3" "PTPRB" "TYROBP"
[29] "LAPTM5" "HSPB1"
[1] ""
[1] ""
[1] "PC2"
[1] "SLC22A17" "DTX3" "OLFML1" "CD27-AS1" "ARMCX2" "GLT8D2" "PRRT2"
[8] "AMT" "BTN3A3" "BTN3A1" "SEPT5" "C1orf54" "DPYSL3" "CHRD"
[15] "RERG" "ISYNA1" "MAP3K12" "SCARA3" "RARG" "CDR2" "ASPN"
[22] "GUCY1B3" "PSMB9" "EPS8L2" "FAM102A" "IL11RA" "ATP1B2" "ABCA10"
[29] "FAS" "CPXM2"
[1] ""
[1] "MARC1" "ACSL1" "ECHDC3" "CIDEC" "RP1-193H18.3"
[6] "ADIPOQ" "AZGP1" "FASN" "RBP4" "SLC25A1"
[11] "MRAP" "ACACB" "PC" "GPT" "ABCD1"
[16] "HEBP2" "PFKFB3" "GYG2" "GGCT" "PGD"
[21] "SLC19A3" "GLUL" "CPM" "GPD1" "NAT8L"
[26] "HSPB7" "RP11-134G8.8" "TMEM132C" "SLC6A8" "SHMT1"
[1] ""
[1] ""
[1] "PC3"
[1] "HOXC9" "TBX15" "HOXC10" "LGI4" "NTRK2"
[6] "RP11-983P16.4" "LMOD1" "HOXC6" "NOVA1" "CLEC3B"
[11] "SMOC1" "RCAN2" "WISP2" "STARD9" "ILF3-AS1"
[16] "OLFML2A" "MLPH" "CAMK2G" "SNX7" "XG"
[21] "EBF3" "ACTA2-AS1" "FAM69B" "KCNAB1" "RP11-141O15.1"
[26] "CAB39L" "PPL" "RRAD" "TRIM52-AS1" "CXCL14"
[1] ""
[1] "GFPT2" "RARRES1" "TIMP1" "PIM1" "GATA6" "ALDH1A3" "PDPN"
[8] "HP" "FKBP11" "NAMPT" "THBS1" "WT1" "HAS1" "KRT8"
[15] "SNHG15" "SGK1" "RDH10" "CLCF1" "TNFSF14" "KRT18" "PVR"
[22] "NR2F1" "SERPINB9" "C7" "ATP1B3" "GPRC5A" "PLAUR" "CD200"
[29] "SLPI" "XBP1"
[1] ""
[1] ""
[1] "PC4"
[1] "HOXB3" "HOXB-AS1" "HOXA4" "AC022007.5" "MMP15" "FGF1"
[7] "RGS11" "TSTD1" "KCNIP2" "BNC1" "VLDLR" "HOXA5"
[13] "AC005550.4" "RASSF7" "MEIS1" "CARNS1" "MDFI" "MST1"
[19] "C14orf180" "C19orf33" "KLK11" "TYRO3" "MSLN" "BCHE"
[25] "PTN" "PRICKLE4" "ITLN1" "QPRT" "ANXA3" "SLC40A1"
[1] ""
[1] "ELL" "UPP1" "SPHK1" "SERPINE1" "ELL2" "SLC39A14" "SPSB1"
[8] "HAPLN3" "PANX1" "OSMR" "GADD45B" "UAP1" "ETV6" "FEM1C"
[15] "RND3" "PCSK7" "CRISPLD2" "PTX3" "ITPKC" "TNC" "BASP1"
[22] "SLCO4A1" "C11orf96" "ANPEP" "FHL3" "MT1X" "NAMPT" "C10orf10"
[29] "SAP30" "SBNO2"
[1] ""
[1] ""
[1] "PC5"
[1] "TYROBP" "DOK2" "CD68" "IFI30" "ITGB2" "HAVCR2" "LRRC25"
[8] "UNC93B1" "PTPN6" "ADAP2" "ARHGAP30" "MSR1" "HCST" "CSF1R"
[15] "LCP1" "FPR3" "CYTH4" "NCKAP1L" "LAIR1" "LGALS9" "C3AR1"
[22] "WAS" "CCR1" "C1QB" "RNASET2" "MS4A7" "FGD2" "PLCB2"
[29] "GM2A" "CECR1"
[1] ""
[1] "NPNT" "HEYL" "SORBS2" "CACNA1H" "CASQ2" "PPP1R14A" "CNN1"
[8] "SPEG" "LTBP1" "HES4" "AMIGO2" "PLN" "ADRA2C" "TPM2"
[15] "SLMAP" "MYH11" "ITGA8" "EFHD1" "CSRP1" "PDLIM3" "C11orf96"
[22] "CSDC2" "SLC38A1" "TMOD1" "RNF152" "SYNM" "TPD52L1" "SGCA"
[29] "SLCO2A1" "LDB3"
[1] ""
[1] ""
merged_bulk_gtex@meta.data$orig.ident[merged_bulk_gtex@meta.data$orig.ident == 'SeuratProject'] <- 'GTEx'
saveRDS(merged_bulk_gtex, '../output/bulk-gtex-merged')
Merge with 10x data
#TODO for bulk analysis:
#Create subset of 10x-180831 data: 100 cells from oxidative, 100 cells from ECM branch. Then align with bulk and GTEx.
subset <- SubsetData(seurobj, cells.use=rownames(seurobj@meta.data)[seurobj@meta.data$branch_high_res == 'ECM_top10' | seurobj@meta.data$branch_high_res == 'oxidative_top10'])
merged <- MergeSeurat(subset, merged_bulk_gtex)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
merged <- FindVariableGenes(merged)
Calculating gene means
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variance to mean ratios
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
merged <- ScaleData(merged)
Scaling data matrix
|
| | 0%
|
|================================================================================| 100%
merged <- RunPCA(merged)
[1] "PC1"
[1] "UQCR11.1" "MALAT1" "NEAT1" "COX7B" "UQCR10" "DBI" "DDT"
[8] "MGST3" "MTRNR2L8" "ADIRF" "ETFB" "MIF" "ATOX1" "MPC2"
[15] "NDUFB8.1" "HEBP2" "MINOS1" "GCSH" "NDUFC2" "CHCHD10" "MTRNR2L10"
[22] "FDPS" "LINC00116" "CISD3" "CYCS" "NDUFS6" "NDUFAB1" "FABP5"
[29] "FADS1" "CKS1B"
[1] ""
[1] "CD74" "HLA-DRB1" "HLA-DRA" "MT-ND4L" "C1QA" "CLDN5" "BHLHE40"
[8] "NFKBIA" "ADAMTS1" "C11orf96" "C10orf10" "RBP7" "IGFBP3" "MT-ND6"
[15] "C1QB" "ACTA2" "GADD45B" "C3" "GPX3" "SLC2A3" "TM4SF1"
[22] "IFI30" "CSRP1" "C7" "TGM2" "NR4A1" "IFI27" "MT-ATP6"
[29] "HLA-DRB5" "CD163"
[1] ""
[1] ""
[1] "PC2"
[1] "FASN" "RBP4" "G0S2" "DGAT2" "SCD" "GPAM" "LPL"
[8] "ACSS2" "INSIG1" "ACLY" "VKORC1L1" "MT-ND5" "UCP2" "SREBF1"
[15] "MT-ATP6" "CHCHD10" "THRSP" "FABP5" "PDXK" "IDI1" "HSD17B12"
[22] "ALDH1L1" "HK2" "DLD" "FADS1" "HMGCS1" "MVD" "C14orf180"
[29] "LPIN1" "MT-ND6"
[1] ""
[1] "MFAP5" "DCN" "PLAC9" "CST3" "MGP"
[6] "CLDN11" "CFD" "RP11-572C15.6" "FBN1" "PDLIM2"
[11] "IGFBP6" "PDGFRA" "TIMP1" "IGFBP5" "CYP1B1"
[16] "FN1" "TMEM45A" "CRLF1" "EMP3" "MIR4435-1HG"
[21] "SPOCK1" "COL6A3" "LINC00152" "COL1A1" "LOX"
[26] "MARCKS" "FGF7" "OSR2" "ABCA6" "RP11-14N7.2"
[1] ""
[1] ""
[1] "PC3"
[1] "MT2A" "MT1E" "MT1M" "MT1G" "RARRES1" "ITLN1" "KRT8"
[8] "KRT18" "MSLN" "MT1X" "KRT19" "UPK3B" "RPL22L1" "SLPI"
[15] "HSPB1" "TIMP1" "LY6E" "CYBA" "MT1A" "IL6" "ADAMTS4"
[22] "EGR1" "C7" "CISD3" "CXCL2" "CXCL1" "MARCKSL1" "NDUFC2"
[29] "LIF" "FOSB"
[1] ""
[1] "RPS17L" "SLC2A5" "LRP1" "PEG10" "COL12A1" "ELOVL6" "NRCAM" "FADS2"
[9] "DYNC1H1" "MME" "ME1" "ACSS2" "NOTCH2" "ZNF117" "ECHDC1" "COL6A3"
[17] "TNC" "LAMA2" "GPAM" "TEAD1" "DST" "VCAN" "FN1" "SETD7"
[25] "LPIN1" "SVEP1" "ASPH" "DDR2" "COL3A1" "NDUFS1"
[1] ""
[1] ""
[1] "PC4"
[1] "RARRES1" "KRT8" "LIF" "KRT18" "CXCL1" "HP" "MSLN" "THBS1"
[9] "ITLN1" "SLPI" "GFPT2" "KRT19" "UPK3B" "IL6" "IL8" "NAMPT"
[17] "TIMP1" "GREM1" "CCL2" "PIM1" "MT1G" "ADAMTS4" "HMOX1" "ALDH1A3"
[25] "CXCL2" "OGN" "IER3" "FOSB" "SOCS3" "EGR1"
[1] ""
[1] "CXCL14" "LMOD1" "CLEC3B" "WISP2" "MUSTN1" "MYH11" "MMP3"
[8] "ACTG2" "LEP" "MYOC" "CLDN5" "CNN1" "ACTA2" "S100B"
[15] "DES" "COMP" "HLA-DRA" "NR1D1" "HLA-DRB1" "CD74" "PI16"
[22] "HLA-DRB5" "MPZ" "C19orf80" "CSRP1" "CTHRC1" "C10orf10" "DDIT4"
[29] "HSPB7" "RBP4"
[1] ""
[1] ""
[1] "PC5"
[1] "SFRP2" "OGN" "C8orf4" "UPK3B" "CCL21" "IFI6" "NR1D1" "PI16"
[9] "MSLN" "HBA2" "ISG15" "HSPA1B" "GREM1" "HSPA1A" "LY6E" "KRT19"
[17] "MYOC" "MUSTN1" "HBB" "SLC40A1" "ITLN1" "HBD" "CRABP2" "CPXM1"
[25] "PTGDS" "COL1A1" "PCOLCE" "CNN1" "CXCL14" "SLPI"
[1] ""
[1] "MT1X" "MT1A" "MT1M" "MT2A" "MT1E" "LEP" "SAA2" "MT1G"
[9] "MMP19" "HSPB7" "IL6" "FOSB" "CYP4B1" "GLUL" "PFKFB3" "PTX3"
[17] "CD163" "SAA1" "GPX3" "CXCL2" "FASN" "LIF" "ADAMTS4" "PIM1"
[25] "CCL2" "SOCS3" "THBS1" "NAMPT" "CES1" "IL8"
[1] ""
[1] ""
PCElbowPlot(merged)
DimPlot(merged, group.by='orig.ident')
subset <- SubsetData(merged, cells.use=rownames(merged@meta.data)[merged@meta.data$orig.ident != 'GTEx'])
saveRDS(subset, '../output/bulk-sc180831-oxidative.ECM-merged')
Add GTEx data.
merged_all <- MergeSeurat(merged, gtex_adipose)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
saveRDS(merged, file='../output/bulk-sc180831-oxidative.ECM-GTEx-merged')
Bulk + oxidative/ECM top cells
#combine State.old.labels with type bulk
merged_bulk.topcells@meta.data['state_type_combined'] <- merged_bulk.topcells@meta.data$type
labels <- merged_bulk.topcells@meta.data$State.old.labels
merged_bulk.topcells@meta.data$state_type_combined[!is.na(labels)] <- labels[!is.na(labels)]
TSNEPlot(merged_bulk.topcells, group.by='state_type_combined')
TSNEPlot(merged_bulk.topcells, group.by='orig.ident')
Bulk + GTEx
Bulk + oxidative/ECM top cells + GTEx